This is the companion R Markdown document to the following presentations that were delivered in Winter 2014-2015:

Adding the TIQ-TEST functions

## Some limitations from not being an R package: Setting the Working directory
tiqtest.dir = file.path("..", "tiq-test")
current.dir = setwd(tiqtest.dir)
source("tiq-test.R")

## Setting the root data path to where it should be in this repo
.tiq.data.setRootPath(file.path(current.dir, "data"))
## INFO [2015-01-28 11:21:37 PST] pid=5826 tiq.data.setRootPath: Setting path to '/Users/alexcp/src/tiq-test-Winter2015/data'

Acessing the data using TIQ-TEST

We have roughly 2 months of data available on this public dataset:

print(tiq.data.getAvailableDates("raw", "public_outbound"))
##  [1] "20141001" "20141002" "20141003" "20141004" "20141005" "20141006"
##  [7] "20141007" "20141008" "20141009" "20141010" "20141011" "20141012"
## [13] "20141013" "20141014" "20141015" "20141016" "20141017" "20141018"
## [19] "20141019" "20141020" "20141021" "20141022" "20141023" "20141024"
## [25] "20141025" "20141026" "20141027" "20141028" "20141029" "20141030"
## [31] "20141031" "20141101" "20141102" "20141103" "20141104" "20141105"
## [37] "20141106" "20141107" "20141108" "20141109" "20141110" "20141111"
## [43] "20141112" "20141113" "20141114" "20141115" "20141116" "20141117"
## [49] "20141118" "20141119" "20141120" "20141121" "20141122" "20141123"
## [55] "20141124" "20141125" "20141126" "20141127" "20141128" "20141129"
## [61] "20141130"
print(tiq.data.getAvailableDates("raw", "public_inbound"))
##  [1] "20141001" "20141002" "20141003" "20141004" "20141005" "20141006"
##  [7] "20141007" "20141008" "20141009" "20141010" "20141011" "20141012"
## [13] "20141013" "20141014" "20141015" "20141016" "20141017" "20141018"
## [19] "20141019" "20141020" "20141021" "20141022" "20141023" "20141024"
## [25] "20141025" "20141026" "20141027" "20141028" "20141029" "20141030"
## [31] "20141031" "20141101" "20141102" "20141103" "20141104" "20141105"
## [37] "20141106" "20141107" "20141108" "20141109" "20141110" "20141111"
## [43] "20141112" "20141113" "20141114" "20141115" "20141116" "20141117"
## [49] "20141118" "20141119" "20141120" "20141121" "20141122" "20141123"
## [55] "20141124" "20141125" "20141126" "20141127" "20141128" "20141129"
## [61] "20141130"

This time, we also have a couple of private data feeds over some of this time, but the information in them cannot be shared publically as a part of this release:

if (tiq.data.isDatasetAvailable("raw", "private1")) {
  print(tiq.data.getAvailableDates("raw", "private1"))
}
##  [1] "20141001" "20141002" "20141004" "20141005" "20141006" "20141007"
##  [7] "20141008" "20141009" "20141010" "20141011" "20141012" "20141013"
## [13] "20141014" "20141015" "20141016" "20141017" "20141018" "20141019"
## [19] "20141020" "20141021" "20141022" "20141023" "20141024" "20141025"
## [25] "20141026" "20141027" "20141028" "20141029" "20141030" "20141031"
## [31] "20141101" "20141102" "20141103" "20141104" "20141105" "20141106"
## [37] "20141107" "20141108" "20141109" "20141110" "20141111" "20141112"
## [43] "20141113" "20141114" "20141115" "20141116" "20141117" "20141118"
## [49] "20141119" "20141120" "20141121" "20141122" "20141123" "20141124"
## [55] "20141125" "20141126" "20141127" "20141128" "20141129" "20141130"
if (tiq.data.isDatasetAvailable("raw", "private2")) {
  print(tiq.data.getAvailableDates("raw", "private2"))
}
##  [1] "20141113" "20141114" "20141115" "20141116" "20141117" "20141118"
##  [7] "20141119" "20141120" "20141121" "20141122" "20141123" "20141124"
## [13] "20141125" "20141126" "20141127" "20141128" "20141129" "20141130"

This is an example of “RAW” (not enriched) outbound data imported from combine output

outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20141101")
outbound.ti[, list(entity, type, direction, source, date)]
##                          entity type direction     source       date
##     1:             1.168.15.140 IPv4  outbound alienvault 2014-11-01
##     2:                1.93.6.86 IPv4  outbound alienvault 2014-11-01
##     3:             100.42.211.4 IPv4  outbound alienvault 2014-11-01
##     4:           101.227.172.24 IPv4  outbound alienvault 2014-11-01
##     5:             101.36.81.55 IPv4  outbound alienvault 2014-11-01
##    ---                                                              
## 11388:          up.frigo2000.it FQDN  outbound       zeus 2014-11-01
## 11389:          update.odeen.eu FQDN  outbound       zeus 2014-11-01
## 11390: update.rifugiopontese.it FQDN  outbound       zeus 2014-11-01
## 11391:       vahendkarasis4.com FQDN  outbound       zeus 2014-11-01
## 11392:           welcahllyn.com FQDN  outbound       zeus 2014-11-01

We can use the same loadTI function to also gather the enriched datasets:

enrich.ti = tiq.data.loadTI("enriched", "public_outbound", "20141101")
enrich.ti = enrich.ti[, notes := NULL]
tail(enrich.ti)
##            entity type direction source       date asnumber
## 1:  94.102.63.153 IPv4  outbound   zeus 2014-11-01    29073
## 2:   94.103.36.55 IPv4  outbound   zeus 2014-11-01    47894
## 3:  95.163.121.12 IPv4  outbound   zeus 2014-11-01    12695
## 4: 98.131.185.136 IPv4  outbound   zeus 2014-11-01    32392
## 5: 98.131.185.136 IPv4  outbound   zeus 2014-11-01    32392
## 6:    99.181.5.83 IPv4  outbound   zeus 2014-11-01     7018
##                     asname country                       host
## 1:          Ecatel Network      NL                         NA
## 2: VeriTeknik Bilisim Ltd.      TR                         NA
## 3:   Digital Networks CJSC      RU                         NA
## 4:   Ecommerce Corporation      US                         NA
## 5:   Ecommerce Corporation      US projects.globaltronics.net
## 6:     AT&T Services, Inc.      US                         NA
##                                        rhost
## 1:                            exadomains.net
## 2:                 datacenter.veriteknik.com
## 3:                                        NA
## 4:                                        NA
## 5:                                        NA
## 6: adsl-99-181-5-83.dsl.irvnca.sbcglobal.net

This specific outbound dataset has the following sources included:

outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20141101")
unique(outbound.ti$source)
##  [1] "alienvault"        "feodo"             "malcode"          
##  [4] "malcode_zones"     "malwaredomainlist" "malwaredomains"   
##  [7] "malwaregroup"      "palevotracker"     "spyeye"           
## [10] "sslbl"             "zeus"

We can do the same for the inbound data we have to see the sources we have available:

inbound.ti = tiq.data.loadTI("raw", "public_inbound", "20141101")
unique(inbound.ti$source)
##  [1] "alienvault"        "autoshun"          "blocklistde"      
##  [4] "botscout"          "bruteforceblocker" "charleshaley"     
##  [7] "ciarmy"            "dragonresearch"    "dshield"          
## [10] "honeypot"          "openbl"            "packetmail"       
## [13] "virbl"

Here are some results of running the Novelty test on the inbound data:

inbound.novelty = tiq.test.noveltyTest("public_inbound", "20141001", "20141130", 
                                select.sources=c("alienvault", "blocklistde", 
                                                 "dshield", "charleshaley"))
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |=================================================================| 100%
tiq.test.plotNoveltyTest(inbound.novelty)

And results running on the outbound data:

outbound.novelty = tiq.test.noveltyTest("public_outbound", "20141001", "20141130", 
                                select.sources=c("alienvault", "malwaregroup", 
                                                 "malcode", "zeus"))
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |=================================================================| 100%
tiq.test.plotNoveltyTest(outbound.novelty)

Overlap Test

This is an example of applying the Overlap Test to our inbound dataset

  overlap = tiq.test.overlapTest("public_inbound", "20141101", "enriched", 
                                 select.sources=NULL)
  overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Inbound Data - 20141101")
  print(overlap.plot)

Similarly, an example applying the Overlap Test to the outbound dataset

  overlap = tiq.test.overlapTest("public_outbound", "20141101", "enriched", 
                                 select.sources=NULL)
  overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Outbound Data - 20141101")
  print(overlap.plot)

With the population data we can generate some plot to compare the top quantities of reported IP addresses on a specific date by Country

  outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country", 
                                                  date = "20141111",
                                                  select.sources=NULL, split.ti=F)
  inbound.pop = tiq.test.extractPopulationFromTI("public_inbound", "country", 
                                                 date = "20141111",
                                                 select.sources=NULL, split.ti=F)

  complete.pop = tiq.data.loadPopulation("mmgeo", "country")
## Warning in max(tiq.data.getAvailableDates(category, group)): no
## non-missing arguments, returning NA
## WARN [2015-01-28 11:23:14 PST] pid=5826 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/population/mmgeo/NA.csv.gz' is invalid. No data available on date 'NA'.
  tiq.test.plotPopulationBars(c(inbound.pop, outbound.pop, complete.pop), "country")

outbound.aging = tiq.test.agingTest("public_outbound", "20141001", "20141130")
tiq.test.plotAgingTest(outbound.aging)

inbound.aging = tiq.test.agingTest("public_inbound", "20141001", "20141130")
tiq.test.plotAgingTest(inbound.aging)

outbound.aging = tiq.test.agingTest("public_outbound", "20141001", "20141130",
                                    split.ti=F)
tiq.test.plotAgingTest(outbound.aging)

private.aging = tiq.test.agingTest("private1", "20141001", "20141130",
                                    split.ti=F)
## WARN [2015-01-28 11:26:55 PST] pid=5826 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/enriched/private1/20141003.csv.gz' is invalid. No data available on date '20141003'.
tiq.test.plotAgingTest(private.aging, density.limit=0.7)

  outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country", 
                                                  date = "20141110",
                                                  select.sources=NULL, split.ti=F)
  private.pop = tiq.test.extractPopulationFromTI("private1", "country", 
                                                 date = "20141110",
                                                 select.sources=NULL, split.ti=F)

  tiq.test.plotPopulationBars(c(private.pop, outbound.pop), "country", title="Comparing Private1 and Public Feeds on 20141110")

private.novelty = tiq.test.noveltyTest("private1", "20141001", "20141130", split.tii=F)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%WARN [2015-01-28 11:27:04 PST] pid=5826 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/raw/private1/20141003.csv.gz' is invalid. No data available on date '20141003'.
## 
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |=================================================================| 100%
tiq.test.plotNoveltyTest(private.novelty)

outbound.novelty = tiq.test.noveltyTest("public_outbound", "20141001", "20141130",split.tii=F)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |=================================================================| 100%
tiq.test.plotNoveltyTest(outbound.novelty)

This is an example of applying the Overlap Test to our inbound dataset

  overlap = tiq.test.overlapTest("public_inbound", "20141101", "enriched", 
                                 select.sources=NULL)
  overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Inbound Data - 20141101")
  print(overlap.plot)

  overlap = tiq.test.overlapTest(c("public_outbound", "private1"), "20141101", "enriched", 
                                 split.ti=F, select.sources=NULL)
  tiq.test.plotOverlapTest(overlap, title="OVERLAP - public_outbound VS private1 - 20141101")

  overlap2 = tiq.test.overlapTest(c("public_outbound", "private2"), "20141115", "enriched", 
                                 split.ti=F, select.sources=NULL)
  tiq.test.plotOverlapTest(overlap2, title="OVERLAP - public_outbound VS private2 - 20141115")